Feedforward model with one hidden layer.
Trained just using LOB and not Liquidity measures: Input dimension is 60 x 21 = 1260
Mean squared error used as loss function.
Optimizer: Adam
Training set consists of 202 days (~80%)
Validation set consists of 51 days (~20%)
Shown below:
Network structure
Settings used for training
Plots of model performance
import os
import torch
from torchsummary import summary
from feedforward_one_layer import FFNN1
from IPython.display import HTML
def hide_code(): return HTML('''<script>code_show=true; function code_toggle() {if (code_show){$('div.input').hide();} else {$('div.input').show();}code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
model=FFNN1()
ckpt_name = os.listdir()[[*map(lambda x: x.count('ckpt'),os.listdir())].index(True)]
ckpt = torch.load('./'+ckpt_name,map_location=torch.device('cpu'))
print(f'Input size: {60*21}')
summary(model.float(), input_size=(60,21))
hide_code()
import pandas as pd
from configs.train_config import cfg
configuration = pd.concat([
pd.DataFrame.from_dict(cfg,'index').loc[['STOCK','LOB','LIQ_VARS']],\
pd.DataFrame.from_dict(cfg.TRAIN,'index').loc[['SHUFFLE','VAL_SHUFFLE','INTRADAY_SHUFFLE'\
,'SPLIT_RATIO','BATCH_SIZE','VAL_BATCH_SIZE']], \
pd.DataFrame.from_dict(cfg.MODEL,'index').loc[['BACKBONE','LOSS','DROPOUT_RATE','LEAKY_SLOPE']],\
pd.DataFrame.from_dict(cfg.OPTIMIZER,'index').loc[['LR','METHOD','LR_SCHEDULER','LAMBDA']]],\
keys=['USED IN TRAINING','TRAIN','MODEL','OPTIMIZER']); configuration.columns=['CONFIGURATIONS']
print(f'Epoch loaded: {ckpt_name[5:-4]}')
configuration
import sys
sys.path.append('./../')
import numpy as np
filenames = os.listdir(cfg.DATA.DATA_PATH)
[filenames.pop(i) for i,k in enumerate(filenames) if k.split('.')[-1]!='npy'] #.DS_STORE problem
filenames.sort()
train_datanames = []
if cfg.DATA.PORTION is not None:
filenames = filenames[:int(len(filenames)*cfg.DATA.PORTION)]
for filename in filenames:
if len(train_datanames) < int(len(filenames)*cfg.TRAIN.SPLIT_RATIO):
train_datanames.append(filename)
else:
break
val_datanames = [i for i in filenames if not train_datanames.count(i)]
X_t = [] ; y_t = [] ; X_v = [] ; y_v = []
for i in train_datanames:
item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
X_t.append(torch.from_numpy(item['X'])); y_t.append(torch.from_numpy(item['y']))
for i in val_datanames:
item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
X_v.append(torch.from_numpy(item['X'])); y_v.append(torch.from_numpy(item['y']))
model.double();model.load_state_dict(ckpt['model_state'], strict=False);torch.set_grad_enabled(False);model.eval()
temp = []
for i in X_t:
for k in i:
temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []
for i in X_v:
for k in i:
temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])
for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
if i.tolist() != k.tolist():
raise Exception
for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
if i.tolist() != k.tolist():
raise Exception
y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)
y_t = torch.cat(y_t) ; y_v = torch.cat(y_v)
mse_train = [torch.mean((i-j)**2).numpy() for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mse_val = [torch.mean((i-j)**2).numpy() for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]
mape_train = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mape_val = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]
Below we have the converged and the best result of model each having five plots for five different variables we wanted to predict. They are created by plotting the training and prediction performance of the model throughout the year for each day's trading window, which goes from the 61st trading minute until the last trading minute of the day.
These windows are ordered and stacked, starting from the first trading day's window until the last day's trading window.
On the x-axis we always have the minutes and on the y-axis the corresponding variable of the plot.
from CODES.utils.plotter import plotter
args = [ #FIRST AXIS
[ [[i[0].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[0].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[0].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[0].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[0],mape_train[0]],[mse_val[0],mape_val[0]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,None
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#SECOND AXIS
[ [[i[1].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[1].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[1].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[1].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[1],mape_train[1]],[mse_val[1],mape_val[1]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,None
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#THIRD AXIS
[ [[i[2].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[2].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[2].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[2].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[2],mape_train[2]],[mse_val[2],mape_val[2]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,None
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#FOURTH AXIS
[ [[i[3].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[3].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[3].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[3].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[3],mape_train[3]],[mse_val[3],mape_val[3]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-0.0002,0.0004]
,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
]
,#FIFTH AXIS
[ [[i[4].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[4].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[4].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[4].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText= [[mse_train[4],mape_train[4]],[mse_val[4],mape_val[4]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-0.0002,0.0004]
,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
]
]
attrs = ['plot','plot','plot','plot'
,'make_table'
,'set_ylim'
,'set_xlabel','set_ylabel','set_title'
,'legend','grid']
plotter(args,attrs,fig_title=f'Converged Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=300, ncols=1,save_path = os.getcwd())
ckpt_name = os.listdir('./mvp_epochs')[[*map(lambda x: x.count('ckpt'),os.listdir('./mvp_epochs'))].index(True)]
ckpt = torch.load('./mvp_epochs/'+ckpt_name,map_location=torch.device('cpu'))
model.double();model.load_state_dict(ckpt['model_state'], strict=False);model.eval();torch.set_grad_enabled(False)
temp = []
for i in X_t:
for k in i:
temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []
for i in X_v:
for k in i:
temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])
for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
if i.tolist() != k.tolist():
raise Exception
for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
if i.tolist() != k.tolist():
raise Exception
y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)
mse_train = [torch.mean((i-j)**2).numpy() for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mse_val = [torch.mean((i-j)**2).numpy() for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]
mape_train = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mape_val = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]
args = [ #FIRST AXIS
[ [[i[0].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[0].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[0].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[0].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[0],mape_train[0]],[mse_val[0],mape_val[0]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,None
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#SECOND AXIS
[ [[i[1].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[1].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[1].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[1].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[1],mape_train[1]],[mse_val[1],mape_val[1]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,None
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#THIRD AXIS
[ [[i[2].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[2].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[2].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[2].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[2],mape_train[2]],[mse_val[2],mape_val[2]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,None
,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
]
,#FOURTH AXIS
[ [[i[3].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[3].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[3].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[3].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText=[[mse_train[3],mape_train[3]],[mse_val[3],mape_val[3]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-0.0002,0.0004]
,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
]
,#FIFTH AXIS
[ [[i[4].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[4].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[4].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[4].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]
,[dict(cellText= [[mse_train[4],mape_train[4]],[mse_val[4],mape_val[4]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
,[-0.0002,0.0004]
,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
]
]
attrs = ['plot','plot','plot','plot'
,'make_table'
,'set_ylim'
,'set_xlabel','set_ylabel','set_title'
,'legend','grid']
plotter(args,attrs,fig_title=f'Best Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=300, ncols=1,save_path = os.getcwd())